library(ggplot2)

#retrieve the gapminder data
system('wget http://hwheeler01.github.io/comp150/ggplot/gapminder.csv' )
#load data
gap <- read.table("gapminder.csv",sep=",",header=TRUE)
#use str() to find out more about the data.frame
str(gap)
## 'data.frame':    1698 obs. of  6 variables:
##  $ country  : Factor w/ 147 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : int  8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
#view the first few rows
head(gap)
##       country continent year lifeExp      pop gdpPercap
## 1 Afghanistan      Asia 1952  28.801  8425333  779.4453
## 2 Afghanistan      Asia 1957  30.332  9240934  820.8530
## 3 Afghanistan      Asia 1962  31.997 10267083  853.1007
## 4 Afghanistan      Asia 1967  34.020 11537966  836.1971
## 5 Afghanistan      Asia 1972  36.088 13079460  739.9811
## 6 Afghanistan      Asia 1977  38.438 14880372  786.1134
#to see more
head(gap, 20)
##        country continent year lifeExp      pop gdpPercap
## 1  Afghanistan      Asia 1952  28.801  8425333  779.4453
## 2  Afghanistan      Asia 1957  30.332  9240934  820.8530
## 3  Afghanistan      Asia 1962  31.997 10267083  853.1007
## 4  Afghanistan      Asia 1967  34.020 11537966  836.1971
## 5  Afghanistan      Asia 1972  36.088 13079460  739.9811
## 6  Afghanistan      Asia 1977  38.438 14880372  786.1134
## 7  Afghanistan      Asia 1982  39.854 12881816  978.0114
## 8  Afghanistan      Asia 1987  40.822 13867957  852.3959
## 9  Afghanistan      Asia 1992  41.674 16317921  649.3414
## 10 Afghanistan      Asia 1997  41.763 22227415  635.3414
## 11 Afghanistan      Asia 2002  42.129 25268405  726.7341
## 12 Afghanistan      Asia 2007  43.828 31889923  974.5803
## 13     Albania    Europe 1952  55.230  1282697 1601.0561
## 14     Albania    Europe 1957  59.280  1476505 1942.2842
## 15     Albania    Europe 1962  64.820  1728137 2312.8890
## 16     Albania    Europe 1967  66.220  1984060 2760.1969
## 17     Albania    Europe 1972  67.690  2263554 3313.4222
## 18     Albania    Europe 1977  68.930  2509048 3533.0039
## 19     Albania    Europe 1982  70.420  2780097 3630.8807
## 20     Albania    Europe 1987  72.000  3075321 3738.9327
#view last few rows
tail(gap)
##       country continent year lifeExp      pop gdpPercap
## 1693 Zimbabwe    Africa 1982  60.363  7636524  788.8550
## 1694 Zimbabwe    Africa 1987  62.351  9216418  706.1573
## 1695 Zimbabwe    Africa 1992  60.377 10704340  693.4208
## 1696 Zimbabwe    Africa 1997  46.809 11404948  792.4500
## 1697 Zimbabwe    Africa 2002  39.989 11926563  672.0386
## 1698 Zimbabwe    Africa 2007  43.487 12311143  469.7093
#let's initialize a plot
ggplot(gap, aes(x=gdpPercap,y=lifeExp))

#aes stands for aesthetics and is where you tell ggplot what you want on the axes

#let's add points
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point()

#let's log tranform the x-axis
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point() + scale_x_log10()

#how about some color?
ggplot(gap, aes(x=gdpPercap,y=lifeExp,color=continent)) + geom_point() + scale_x_log10()

### Plot lifeExp vs. year colored by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point()

### Let's separate by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point() + facet_wrap(~continent)

### Let's remove Oceania and connect countries with lines
# this requires the package dplyr to filter
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
no_oceania <- dplyr::filter(gap,continent != "Oceania")
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent)

### Let's pull out a subset of countries to plot
clist <- c("United States", "Mexico", "Canada")
subgap <- dplyr::filter(gap, country %in% clist)
ggplot(subgap, aes(x=year, y=lifeExp, color=country)) + geom_line() + geom_point()

### We can also do boxplots
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot()

### and change axis labels
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot() + xlab("Continent") + ylab("Life Expectancy (years)")

### We can also do histograms
ggplot(gap, aes(x=lifeExp)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

### we can adjust binwidth
ggplot(gap, aes(x=lifeExp)) + geom_histogram(binwidth = 1)

### we can make density plots
ggplot(gap, aes(x=lifeExp)) + geom_density()

### and color by continent
ggplot(gap, aes(x=lifeExp, color = continent)) + geom_density()

### we can facet anything
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_histogram(binwidth = 1) + facet_wrap(~continent)

ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() + facet_wrap(~continent,ncol=1)

Let’s play with colors!

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_manual(values = c('red','purple','darkgreen','orange'))

### Print a list of options from the color brewer
library(RColorBrewer)
display.brewer.all()

### Choose a color brewer palette

# type = One of seq (sequential), div (diverging) or qual (qualitative)
# palette = If a string, will use that named palette (see above). If a number, will index into the list of palettes of appropriate type

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_brewer(palette = 'Dark2')

ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() + facet_wrap(~continent,ncol=1) + scale_fill_brewer(type = 'seq', palette = 1)

### Change background theme
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_brewer(type = 'div', palette = 2) + theme_bw()

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_brewer(palette = 'PuOr') + theme_classic()

library(ggthemes)
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + theme_economist() + scale_color_economist()

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + theme_wsj() + scale_color_wsj()